#!/home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
import sys
import pandas as pd
import numpy as np
import datetime as d
import argparse
'''
course的时间
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()

time_format = "%Y-%m-%dT%H:%M:%S"

log_path, output = args['log_path'], args['output']

log = pd.read_csv(log_path)
event_set = set()
log.event.map(lambda x: event_set.add(x))
log_date = log.time.map(lambda x: d.datetime.strptime(x, "%Y-%m-%dT%H:%M:%S"))
log['date'] = log_date

course_fea = pd.DataFrame()

course_event_date = {}
for course_id, event, date in log[ ["course_id", "event", "date"] ].values:
    if course_id not in course_event_date:
        course_event_date[course_id] = { 'overall': []}
    if event not in course_event_date[course_id]:
        course_event_date[course_id][event] = []
    course_event_date[course_id]['overall'].append(date)
    course_event_date[course_id][event].append(date)

course_ids = set()
log.course_id.map(lambda x: course_ids.add(x))

course_fea['course_id'] = list(course_ids)

event_set = set()

log.event.map(lambda x: event_set.add(x))

event_set.add('overall')

# init head
events = {}
for event in event_set:
    course_fea['event_%s_date' % event] = [None for i in range(course_fea.course_id.count())]
    events[event] = []

for course_id in course_fea.course_id.values:
    rcd = course_event_date[course_id]
    for event in event_set:
        events[event].append(
            ' '.join(date.strftime(time_format) for date in sorted(rcd.get(event, []))))

for event,rcd in events.items():
    course_fea['event_%s_date' % event] = rcd

course_fea.to_csv(output, index=False)

